*** This code pulls data to study pre-populated tax returns

cap log close
set more off
set type double
set linesize 150

cd ${gdir}

log using ${logdir}/get_data/pre_sas/insole_pull.log , replace


*********************
*** Variable list ***
*********************

local vlist "rec_id flpdyr reject s002 s003 s006 mars dobyr sdobyr chldelignm s025 s026 s027 s028 s029 s030"
local vlist "`vlist' s031 s032 s033 s034 s053 s054 s055 xtxcr1 xtxcr2 xtxcr3 xtxcr4 xtxcr5 xtxcr6 xtxcr7"
local vlist "`vlist' xtxcr8 xtxcr9 xtxcr10 tdep1dob tdep2dob tdep3dob tdep4dob tdep5dob tdep6dob tdep7dob"
local vlist "`vlist' tdep8dob tdep9dob tdep10dob e11400 e11500 e11555 e10610 e00200 e00400 e00300 e00650"
local vlist "`vlist' e00600 e01300 e01400 e01500 e01700 e02400 e01000 fded e02655 e02900 e00100 e04475"
local vlist "`vlist' e04800 e07225 e11070 e09200 e10700 e11000 e10960 e07230 e11900 e12200 e00700 e00800"
local vlist "`vlist' e03500 e01200 e02300 e02800 e03220 e03700 e03290 e03280 e03300 e03270 e03400 e03150"
local vlist "`vlist' e03210 e09600 e05780 e10050 e07300 e07180 e07240 e07260 e07105 e10900 e11560 e11100"
local vlist "`vlist' e11200 e11300 e11590 e19700 e18500 e18800 e17000 e20500 e21020 e00900 e21602 e21603"
local vlist "`vlist' e22250 e22302 e22303 e23250 e25350 e25360 e25870 e26200 e26250 e02100 e29150_0"
local vlist "`vlist' e30700_0 e85190 s53937 t88002 e87530 e85770 prep"



*********************
*** Pull SOI data ***
*********************

forval yr = 19/20 {

	use `vlist' using "/home/projects/insole/Individual/insole`yr'.dta" if flpdyr==2019 & reject==0 , clear

	drop flpdyr reject

	rename s002       soi_prim_unmasked_tin
	rename s003       soi_sec_unmasked_tin
	rename s006       soi_wgt
	rename mars       soi_fil_stat
	rename dobyr      soi_prim_yob
	rename sdobyr     soi_sec_yob
	rename chldelignm soi_deps_tot_ctc
	rename s054       soi_dep1_eitc_unmasked_tin
	rename s055       soi_dep2_eitc_unmasked_tin
	rename s053       soi_dep3_eitc_unmasked_tin
	*NOTE: SOI messed up the order of S053 - S055

	gen soi_other_credits = e11400 + e11500 + e11555 + e10610
	drop                    e11400   e11500   e11555   e10610

	rename e00200   soi_wages
	rename e00400   soi_interest_txexmpt
	rename e00300   soi_interest_txbl
	rename e00650   soi_qual_divs
	rename e00600   soi_total_divs
	rename e01300   soi_ira_dist
	rename e01400   soi_ira_dist_txbl
	rename e01500   soi_pens_anns
	rename e01700   soi_pens_anns_txbl
	rename e02400   soi_soc_sec
	rename e01000   soi_cap_gains
	rename fded     soi_fded
	rename e02655   soi_sched_1_incm
	rename e02900   soi_sched_1_deds
	rename e00100   soi_agi
	rename e04475   soi_qbi_ded
	rename e04800   soi_txbl_incm
	rename e07225   soi_ctc_nonrefundable
	rename e11070   soi_ctc_refundable
	rename e09200   soi_total_tax
	rename e10700   soi_tax_withheld
	rename e11000   soi_eitc
	rename e10960   soi_educ_refundable
	rename e07230   soi_educ_nonrefundable
	rename e11900   soi_tax_balance_due
	rename e12200   soi_late_penalty
	rename e00700   soi_txbl_refunds_etc
	rename e00800   soi_alimony_received
	rename e03500   soi_alimony_paid
	rename e01200   soi_other_gains_losses
	rename e02300   soi_ui
	rename e02800   soi_gambling_incm
	rename e03220   soi_educator_expenses
	rename e03700   soi_biz_expenses
	rename e03290   soi_hsa_ded
	rename e03280   soi_moving_expenses
	rename e03300   soi_se_qual_ret_plans
	rename e03270   soi_se_health_ins_ded
	rename e03400   soi_penalty_early_withdrawal
	rename e03150   soi_ira_ded
	rename e03210   soi_student_intrst_ded
	rename e09600   soi_amt
	rename e05780   soi_ptc_repayment
	rename e10050   soi_hh_emp_taxes
	rename e07300   soi_foreign_tax_credit
	rename e07180   soi_chld_dep_care_credit
	rename e07240   soi_savers_credit
	rename e07260   soi_res_energy_credit
	rename e07105   soi_sch3_nonrefndbl_credits
	rename e10900   soi_estmtd_tx_pymnts
	rename e11560   soi_nptc
	rename e11100   soi_tx_paid_w_extnsn
	rename e11200   soi_excess_sstax_withheld
	rename e11300   soi_fueltax_credit
	rename e11590   soi_sch3_payments_etc
	rename e19700   soi_sched_a_charitable
	rename e18500   soi_sched_a_real_estate_tax
	rename e18800   soi_sched_a_property_tax
	rename e17000   soi_sched_a_medical
	rename e20500   soi_sched_a_casualty_theft
	rename e21020   soi_sched_a_other
	rename e00900   soi_sched_c_netincm
	rename e21602   soi_sched_d_line2h
	rename e21603   soi_sched_d_line3h
	rename e22250   soi_sched_d_capgains_short
	rename e22302   soi_sched_d_line9h
	rename e22303   soi_sched_d_line10h
	rename e23250   soi_sched_d_capgains_long
	rename e25350   soi_sched_e_rents
	rename e25360   soi_sched_e_royalties
	rename e25870   soi_sched_e_net_rent_rylty
	rename e26200   soi_sched_e_prtscp_incm
	rename e26250   soi_sched_e_prtscp_loss
	rename e02100   soi_sched_f_netincm
	rename e29150_0 soi_seca_base
	rename e30700_0 soi_seca_tax
	rename e85190   soi_niit_base
	rename s53937   soi_unreported_tips
	rename t88002   soi_unreported_wages
	rename e87530   soi_qual_educ_expenses
	rename e85770   soi_ptc
	rename prep     soi_prep

	foreach xx of numlist 1/10 {
		rename s0`=`xx'+24' soi_dep`xx'_unmasked_tin
		rename xtxcr`xx'    soi_dep`xx'_ctc
		rename tdep`xx'dob  soi_dep`xx'_yob
		
		destring soi_dep`xx'_yob , replace
	}

	compress
	duplicates drop

	tempfile soi_`yr'
	save `soi_`yr'' , replace
}



***********************************************************************************************
*** Identify TINs appearing in both the 2019 and 2020 INSOLE files, as primary or secondary ***
***********************************************************************************************

forval yr = 19/20 {

	foreach xx in prim sec {
	
		use soi_`xx'_unmasked_tin using `soi_`yr'' if soi_`xx'_unmasked_tin!=0 , clear
	
		rename soi_`xx'_unmasked_tin ssn
		
		if "`xx'"=="prim"  tempfile tin
		if "`xx'"=="prim"  save    `tin' , replace	
	}
	
	append using `tin'
	duplicates drop
	
	if `yr'==19  tempfile tin_19
	if `yr'==19  save    `tin_19' , replace
}

merge 1:1 ssn using `tin_19'
	keep if _merge==3
	drop _merge

assert _N>0
tempfile dup_ssn
save    `dup_ssn' , replace
*NOTE: this is a list of SSNs that appear on 2019 tax returns (as primary or secondary filer) in
*      both the 2019 and 2020 INSOLE files



***************************
*** Append the SOI data ***
***************************

use `soi_20' , clear

foreach xx in prim sec {

	rename soi_`xx'_unmasked_tin ssn
	
	merge m:1 ssn using `dup_ssn'
		keep if _merge==1
		drop _merge
	
	rename ssn soi_`xx'_unmasked_tin
}
*NOTE: when SSNs appear in both the 2019 and 2020 INSOLE files (on 2019 tax returns),
*      we discard the 2020 file's return

gen byte d_2020insole = 1

append using `soi_19'

replace d_2020insole = 0 if missing(d_2020insole)

compress

save ${statadir}/soi_2019.dta , replace


